setwd("C:/Users/sijia/Desktop/current working dictionary/代谢组")
#metabolities data
meta <- read.csv("data/phenotype/metabolity_4681.csv")
dim(meta) #4681 902
colnames(meta)[1:227] #studyid status original metabolities
colnames(meta)[228:452] #log-transfer metabolities
colnames(meta)[453:677] #log/sd-transfer metabolities
colnames(meta)[678:902] #int-transfer metabolities
hist(meta[,678])
#check missing values
table(is.na(meta[,678]))
table(complete.cases(meta)) #false = 511

#lifestyle and covariates
lifestyle_cov <- read.csv("data/lifestyle_covariates_4681.csv")
dim(lifestyle_cov)
#
all(meta$studyid==lifestyle_cov$studyid) #TRUE
data <- cbind(meta, lifestyle_cov)
control_only <- data[data$status_update == "control",]
colnames(data)[678:902]
metabolity_name<-c("nmr_xxl_vldl_p","nmr_xxl_vldl_l","nmr_xxl_vldl_pl","nmr_xxl_vldl_c","nmr_xxl_vldl_ce","nmr_xxl_vldl_fc","nmr_xxl_vldl_tg","nmr_xl_vldl_p","nmr_xl_vldl_l","nmr_xl_vldl_pl","nmr_xl_vldl_c","nmr_xl_vldl_ce","nmr_xl_vldl_fc","nmr_xl_vldl_tg","nmr_l_vldl_p","nmr_l_vldl_l","nmr_l_vldl_pl","nmr_l_vldl_c","nmr_l_vldl_ce","nmr_l_vldl_fc","nmr_l_vldl_tg","nmr_m_vldl_p","nmr_m_vldl_l","nmr_m_vldl_pl","nmr_m_vldl_c","nmr_m_vldl_ce","nmr_m_vldl_fc","nmr_m_vldl_tg","nmr_s_vldl_p","nmr_s_vldl_l","nmr_s_vldl_pl","nmr_s_vldl_c","nmr_s_vldl_ce","nmr_s_vldl_fc","nmr_s_vldl_tg","nmr_xs_vldl_p","nmr_xs_vldl_l","nmr_xs_vldl_pl","nmr_xs_vldl_c","nmr_xs_vldl_ce","nmr_xs_vldl_fc","nmr_xs_vldl_tg","nmr_idl_p","nmr_idl_l","nmr_idl_pl","nmr_idl_c","nmr_idl_ce","nmr_idl_fc","nmr_idl_tg","nmr_l_ldl_p","nmr_l_ldl_l","nmr_l_ldl_pl","nmr_l_ldl_c","nmr_l_ldl_ce","nmr_l_ldl_fc","nmr_l_ldl_tg","nmr_m_ldl_p","nmr_m_ldl_l","nmr_m_ldl_pl","nmr_m_ldl_c","nmr_m_ldl_ce","nmr_m_ldl_fc","nmr_m_ldl_tg","nmr_s_ldl_p","nmr_s_ldl_l","nmr_s_ldl_pl","nmr_s_ldl_c","nmr_s_ldl_ce","nmr_s_ldl_fc","nmr_s_ldl_tg","nmr_xl_hdl_p","nmr_xl_hdl_l","nmr_xl_hdl_pl","nmr_xl_hdl_c","nmr_xl_hdl_ce","nmr_xl_hdl_fc","nmr_xl_hdl_tg","nmr_l_hdl_p","nmr_l_hdl_l","nmr_l_hdl_pl","nmr_l_hdl_c","nmr_l_hdl_ce","nmr_l_hdl_fc","nmr_l_hdl_tg","nmr_m_hdl_p","nmr_m_hdl_l","nmr_m_hdl_pl","nmr_m_hdl_c","nmr_m_hdl_ce","nmr_m_hdl_fc","nmr_m_hdl_tg","nmr_s_hdl_p","nmr_s_hdl_l","nmr_s_hdl_pl","nmr_s_hdl_c","nmr_s_hdl_ce","nmr_s_hdl_fc","nmr_s_hdl_tg","nmr_xxl_vldl_pl_per","nmr_xxl_vldl_c_per","nmr_xxl_vldl_ce_per","nmr_xxl_vldl_fc_per","nmr_xxl_vldl_tg_per","nmr_xl_vldl_pl_per","nmr_xl_vldl_c_per","nmr_xl_vldl_ce_per","nmr_xl_vldl_fc_per","nmr_xl_vldl_tg_per","nmr_l_vldl_pl_per","nmr_l_vldl_c_per","nmr_l_vldl_ce_per","nmr_l_vldl_fc_per","nmr_l_vldl_tg_per","nmr_m_vldl_pl_per","nmr_m_vldl_c_per","nmr_m_vldl_ce_per","nmr_m_vldl_fc_per","nmr_m_vldl_tg_per","nmr_s_vldl_pl_per","nmr_s_vldl_c_per","nmr_s_vldl_ce_per","nmr_s_vldl_fc_per","nmr_s_vldl_tg_per","nmr_xs_vldl_pl_per","nmr_xs_vldl_c_per","nmr_xs_vldl_ce_per","nmr_xs_vldl_fc_per","nmr_xs_vldl_tg_per","nmr_idl_pl_per","nmr_idl_c_per","nmr_idl_ce_per","nmr_idl_fc_per","nmr_idl_tg_per","nmr_l_ldl_pl_per","nmr_l_ldl_c_per","nmr_l_ldl_ce_per","nmr_l_ldl_fc_per","nmr_l_ldl_tg_per","nmr_m_ldl_pl_per","nmr_m_ldl_c_per","nmr_m_ldl_ce_per","nmr_m_ldl_fc_per","nmr_m_ldl_tg_per","nmr_s_ldl_pl_per","nmr_s_ldl_c_per","nmr_s_ldl_ce_per","nmr_s_ldl_fc_per","nmr_s_ldl_tg_per","nmr_xl_hdl_pl_per","nmr_xl_hdl_c_per","nmr_xl_hdl_ce_per","nmr_xl_hdl_fc_per","nmr_xl_hdl_tg_per","nmr_l_hdl_pl_per","nmr_l_hdl_c_per","nmr_l_hdl_ce_per","nmr_l_hdl_fc_per","nmr_l_hdl_tg_per","nmr_m_hdl_pl_per","nmr_m_hdl_c_per","nmr_m_hdl_ce_per","nmr_m_hdl_fc_per","nmr_m_hdl_tg_per","nmr_s_hdl_pl_per","nmr_s_hdl_c_per","nmr_s_hdl_ce_per","nmr_s_hdl_fc_per","nmr_s_hdl_tg_per","nmr_vldl_d","nmr_ldl_d","nmr_hdl_d","nmr_serum_c","nmr_vldl_c","nmr_remnant_c","nmr_ldl_c","nmr_hdl_c","nmr_hdl2_c","nmr_hdl3_c","nmr_estc","nmr_freec","nmr_serum_tg","nmr_vldl_tg","nmr_ldl_tg","nmr_hdl_tg","nmr_totpg","nmr_tg_pg","nmr_pc","nmr_sm","nmr_totcho","nmr_apoa1","nmr_apob","nmr_apob_apoa1","nmr_totfa","nmr_unsat","nmr_dha","nmr_la","nmr_faw3","nmr_faw6","nmr_pufa","nmr_mufa","nmr_sfa","nmr_dha_fa","nmr_la_fa","nmr_faw3_fa","nmr_faw6_fa","nmr_pufa_fa","nmr_mufa_fa","nmr_sfa_fa","nmr_glc","nmr_lac","nmr_cit","nmr_ala","nmr_gln","nmr_his","nmr_ile","nmr_leu","nmr_val","nmr_phe","nmr_tyr","nmr_ace","nmr_acace","nmr_bohbut","nmr_crea","nmr_alb","nmr_gp")

class(data$status_update)
class(data$region_code)
class(data$education)
class(data$fasting_time)
data$region_code=factor(data$region_code)
data$education=factor(data$education)

control_only <- data[data$status_update == "control",]

healthylif3gs_result2_3 <- matrix(nrow=225,ncol=6)
healthylif3gs_result4_5 <- matrix(nrow=225,ncol=6)
rownames(healthylif3gs_result2_3)=(metabolity_name)
rownames(healthylif3gs_result4_5)=(metabolity_name)

colnames(healthylif3gs_result2_3) <- c("estimate2_3","se2_3","zvalue2_3","pr2_3","lci2_3","uci2_3")
colnames(healthylif3gs_result4_5) <- c("estimate4_5","se4_5","zvalue4_5","pr4_5","lci4_5","uci4_5")

# exclude diet component of rice and wheat < daily
# * adjustment: age (continuous), sex, fasting time (<8 or >=8 h), region (10 regions), educational (3 catogories: no formal or primary school, middle or high school, technical school or college or university), and case/control status (dichotomous).
for (i in 1:225){
  hl3gs_model <- lm (data[,678+i-1]~healthy_score_3g2+age_at_study_date+is_female+education+fasting_time+region_code+status_update,data = data)
  healthylif3gs_result2_3[i,1]<-coef(summary(hl3gs_model))[2,1]
  healthylif3gs_result2_3[i,2]<-coef(summary(hl3gs_model))[2,2]
  healthylif3gs_result2_3[i,3]<-coef(summary(hl3gs_model))[2,3]
  healthylif3gs_result2_3[i,4]<-coef(summary(hl3gs_model))[2,4]
  healthylif3gs_result2_3[i,5]<-confint(hl3gs_model,level=0.95)[2,1]
  healthylif3gs_result2_3[i,6]<-confint(hl3gs_model,level=0.95)[2,2]
  healthylif3gs_result4_5[i,1]<-coef(summary(hl3gs_model))[3,1]
  healthylif3gs_result4_5[i,2]<-coef(summary(hl3gs_model))[3,2]
  healthylif3gs_result4_5[i,3]<-coef(summary(hl3gs_model))[3,3]
  healthylif3gs_result4_5[i,4]<-coef(summary(hl3gs_model))[3,4]
  healthylif3gs_result4_5[i,5]<-confint(hl3gs_model,level=0.95)[3,1]
  healthylif3gs_result4_5[i,6]<-confint(hl3gs_model,level=0.95)[3,2]
}
fdr2_3 <- matrix(nrow=225,ncol=1)
rownames(fdr2_3)=(metabolity_name)
colnames(fdr2_3) <- ("fdr2_3")
fdr2_3<-p.adjust(healthylif3gs_result2_3[,4],method="fdr",length(healthylif3gs_result2_3[,4]))
fdr4_5 <- matrix(nrow=225,ncol=1)
rownames(fdr4_5)=(metabolity_name)
colnames(fdr4_5) <- ("fdr4_5")
fdr4_5<-p.adjust(healthylif3gs_result4_5[,4],method="fdr",length(healthylif3gs_result4_5[,4]))
#write results for healthylif3gs
healthylif3gs_result<-cbind(healthylif3gs_result2_3,fdr2_3,healthylif3gs_result4_5,fdr4_5)
write.csv(healthylif3gs_result,file="0108_healthylif3gs2.csv")